Read the data and visualize one instance (all axes) from each class and try to relate the shape (time series) you see with the gestures shown in Figure 1
library(ggplot2)
library(plotly)
library(tidyr)
library(tidyverse)
library(dplyr)
library(reshape2)
library(data.table)
# read the data
xacc= read.table("uWaveGestureLibrary_X_TRAIN")
yacc= read.table("uWaveGestureLibrary_Y_TRAIN")
zacc= read.table("uWaveGestureLibrary_Z_TRAIN")
class=xacc[,1] #get class info
#by aplliying cumsum to acceleration data we will get velocity and position data
xvel = t(apply(xacc[,-1], 1, cumsum)) # 1st column is class info get rid of it, it seems cumsum needs transpose when summing rows
xvel=cbind(class,xvel) #add class info to new matrix
xpos = t(apply(xvel[,-1], 1, cumsum)) # we get position info
xpos=cbind(class,xpos)
#aplly same procedure to y&z
yvel = t(apply(yacc[,-1], 1, cumsum))
yvel=cbind(class,yvel)
ypos = t(apply(yvel[,-1], 1, cumsum))
ypos=cbind(class,ypos)
zvel = t(apply(zacc[,-1], 1, cumsum))
zvel=cbind(class,zvel)
zpos = t(apply(zvel[,-1], 1, cumsum))
zpos=cbind(class,zpos)
#get first instance for each class & sort in class order (its easy to get first one)
len = ncol(xpos)
xpos <- as.data.frame(xpos)
xpos.first = xpos[!duplicated(xpos$class), ]
xpos.first = xpos.first[order(xpos.first$class),]
ypos <- as.data.frame(ypos)
ypos.first = ypos[!duplicated(ypos$class), ]
ypos.first = ypos.first[order(ypos.first$class),]
zpos <- as.data.frame(zpos)
zpos.first = zpos[!duplicated(zpos$class), ]
zpos.first = zpos.first[order(zpos.first$class),]
#plot the class1 first instance
x1 = t(xpos.first[1,2:len])
y1 = t(ypos.first[1,2:len])
z1 = t(zpos.first[1,2:len])
inst1 = cbind(x1,y1,z1)
plot_ly(x=inst1[,1], y=inst1[,2], z=inst1[,3], type="scatter3d",mode="markers")
#plot the class2 first instance
x2 = t(xpos.first[2,2:len])
y2 = t(ypos.first[2,2:len])
z2 = t(zpos.first[2,2:len])
inst2 = cbind(x2,y2,z2)
plot_ly(x=inst2[,1], y=inst2[,2], z=inst2[,3], type="scatter3d",mode="markers")
#plot the class3 first instance
x3 = t(xpos.first[3,2:len])
y3 = t(ypos.first[3,2:len])
z3 = t(zpos.first[3,2:len])
inst3 = cbind(x3,y3,z3)
plot_ly(x=inst3[,1], y=inst3[,2], z=inst3[,3], type="scatter3d",mode="markers")
#plot the class4 first instance
x4 = t(xpos.first[4,2:len])
y4 = t(ypos.first[4,2:len])
z4 = t(zpos.first[4,2:len])
inst4 = cbind(x4,y4,z4)
plot_ly(x=inst4[,1], y=inst4[,2], z=inst4[,3], type="scatter3d",mode="markers")
#plot the class5 first instance
x5 = t(xpos.first[5,2:len])
y5 = t(ypos.first[5,2:len])
z5 = t(zpos.first[5,2:len])
inst5 = cbind(x5,y5,z5)
plot_ly(x=inst5[,1], y=inst5[,2], z=inst5[,3], type="scatter3d",mode="markers")
#plot the class6 first instance
x6 = t(xpos.first[6,2:len])
y6 = t(ypos.first[6,2:len])
z6 = t(zpos.first[6,2:len])
inst6 = cbind(x6,y6,z6)
plot_ly(x=inst6[,1], y=inst6[,2], z=inst6[,3], type="scatter3d",mode="markers")
#plot the class7 first instance
x7 = t(xpos.first[7,2:len])
y7 = t(ypos.first[7,2:len])
z7 = t(zpos.first[7,2:len])
inst7 = cbind(x7,y7,z7)
plot_ly(x=inst7[,1], y=inst7[,2], z=inst7[,3], type="scatter3d",mode="markers")
#plot the class8 first instance
x8 = t(xpos.first[8,2:len])
y8 = t(ypos.first[8,2:len])
z8 = t(zpos.first[8,2:len])
inst8 = cbind(x8,y8,z8)
plot_ly(x=inst8[,1], y=inst8[,2], z=inst8[,3], type="scatter3d",mode="markers")
Comments on task a
It is not easy to compare 3D data to 2D data at the Table 1
But my guesses will be;
class 1 -> 4 class 2 -> 8 class 3 -> 7 class 4 -> 6 class 5 -> 5 class 6 -> 3 class 7 -> 2 class 8 -> 1
xpos2 <- cbind(timeseriesid=1:nrow(xpos), xpos)
ypos2 <- cbind(timeseriesid=1:nrow(ypos), ypos)
zpos2 <- cbind(timeseriesid=1:nrow(zpos), zpos)
leng=ncol(xpos2)
leng2 = leng-2
xpos.long <- reshape(xpos2, direction = "long", varying = list(names(xpos2)[3:leng]),v.names = "X", idvar = c("timeseriesid", "class"), timevar = "time_index", times = 1:leng2)
xpos.long = xpos.long[order(xpos.long$timeseriesid),]
ypos.long <- reshape(ypos2, direction = "long", varying = list(names(ypos2)[3:leng]),v.names = "Y", idvar = c("timeseriesid", "class"), timevar = "time_index", times = 1:leng2)
ypos.long = ypos.long[order(ypos.long$timeseriesid),]
zpos.long <- reshape(zpos2, direction = "long", varying = list(names(zpos2)[3:leng]),v.names = "Z", idvar = c("timeseriesid", "class"), timevar = "time_index", times = 1:leng2)
zpos.long = zpos.long[order(zpos.long$timeseriesid),]
xyz.long <- cbind(timeseriesid=xpos.long$timeseriesid,time_index=xpos.long$time_index, X=xpos.long$X, Y=ypos.long$Y, Z=zpos.long$Z, class=xpos.long$class)
PCA = princomp(xyz.long[,3:5], cor=T)
print(summary(PCA,loadings=T))
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.2724995 1.0380930 0.5505524
Proportion of Variance 0.5397517 0.3592124 0.1010360
Cumulative Proportion 0.5397517 0.8989640 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.209 0.908 0.362
Y 0.723 0.105 -0.683
Z 0.658 -0.404 0.635
reduced_data = cbind(timeseriesid = xyz.long[,1],timeindex = xyz.long[,2],PCA1 = PCA$scores[,1],class = xyz.long[,6])
reduced_data = data.frame(reduced_data)
sel_data1 = subset (reduced_data, xor(reduced_data$timeseriesid==38,reduced_data$timeseriesid==63))
sel_data2 = subset (reduced_data, xor(reduced_data$timeseriesid==31,reduced_data$timeseriesid==101))
sel_data3 = subset (reduced_data, xor(reduced_data$timeseriesid==112,reduced_data$timeseriesid==778))
sel_data4 = subset (reduced_data, xor(reduced_data$timeseriesid==297,reduced_data$timeseriesid==623))
sel_data5 = subset (reduced_data, xor(reduced_data$timeseriesid==235,reduced_data$timeseriesid==35))
sel_data6 = subset (reduced_data, xor(reduced_data$timeseriesid==18,reduced_data$timeseriesid==28))
sel_data7 = subset (reduced_data, xor(reduced_data$timeseriesid==199,reduced_data$timeseriesid==783))
sel_data8 = subset (reduced_data, xor(reduced_data$timeseriesid==62,reduced_data$timeseriesid==337))
sel_data = rbind(sel_data1,sel_data2,sel_data3,sel_data4,sel_data5,sel_data6,sel_data7,sel_data8)
sel_data = data.frame(sel_data)
sel_data$class = as.factor(sel_data$class)
sel_data$timeseriesid = as.factor(sel_data$timeseriesid)
ggplot(data=sel_data, aes(x=timeindex, y=PCA1, colour=class)) + geom_point() #when we make line it merges with others colorwise so I plotted both
ggplot(data=sel_data, aes(x=timeindex, y=PCA1, colour=class)) + geom_line()
Comments on task b
We have performed PCA analysis on position data of 2 time series for each class. PCA component 1 gives best varience coverage with %54. So in order to reduce to 1 dimension component 1 is choosen. Comp.1 gives X value 0.209 Y value 0.723 and Z value 0.658. Mainly the weight of Y and Z components are much bigger than weight of X for component 1. So the X data have very little impact on component 1. As we observe from PCA results, component 2 is heavly depend on X.
We plotted 2 timeseries from each class. The classes 1 and 6 can be seperated with component 1, but I cannot observe any specific results for other classes excpet maybe class 5. The other classes are clustured together.
Since the effect of X is very low on component 1, I did not expected classes to be seperated entirely, and we also only covered the %54 of the varience.
It may also be wise to use acceleration data not the position data for PCA.
# get xyz data for class specific
xyz.long=data.frame(xyz.long)
xyz.long1 = filter (xyz.long, xyz.long$class ==1)
xyz.long2 = filter (xyz.long, xyz.long$class ==2)
xyz.long3 = filter (xyz.long, xyz.long$class ==3)
xyz.long4 = filter (xyz.long, xyz.long$class ==4)
xyz.long5 = filter (xyz.long, xyz.long$class ==5)
xyz.long6 = filter (xyz.long, xyz.long$class ==6)
xyz.long7 = filter (xyz.long, xyz.long$class ==7)
xyz.long8 = filter (xyz.long, xyz.long$class ==8)
PCA1 = princomp(xyz.long1[,3:5], cor=T)
print(summary(PCA1,loadings=T))
PCA2 = princomp(xyz.long2[,3:5], cor=T)
print(summary(PCA2,loadings=T))
PCA3 = princomp(xyz.long3[,3:5], cor=T)
print(summary(PCA3,loadings=T))
PCA4 = princomp(xyz.long4[,3:5], cor=T)
print(summary(PCA4,loadings=T))
PCA5 = princomp(xyz.long5[,3:5], cor=T)
print(summary(PCA5,loadings=T))
PCA6 = princomp(xyz.long6[,3:5], cor=T)
print(summary(PCA6,loadings=T))
PCA7 = princomp(xyz.long7[,3:5], cor=T)
print(summary(PCA7,loadings=T))
PCA8 = princomp(xyz.long8[,3:5], cor=T)
print(summary(PCA8,loadings=T))
PCA = princomp(xyz.long[,3:5], cor=T)
print(summary(PCA,loadings=T))
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.2932492 1.0091473 0.5559931
Proportion of Variance 0.5574978 0.3394594 0.1030428
Cumulative Proportion 0.5574978 0.8969572 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.119 0.973 0.198
Y 0.693 -0.224 0.685
Z 0.711 -0.701
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.1286537 0.9457021 0.9120243
Proportion of Variance 0.4246197 0.2981175 0.2772628
Cumulative Proportion 0.4246197 0.7227372 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.535 0.814 0.224
Y 0.613 -0.192 -0.767
Z 0.581 -0.548 0.602
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.2156489 1.0027730 0.7187794
Proportion of Variance 0.4926008 0.3351846 0.1722146
Cumulative Proportion 0.4926008 0.8277854 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.709 0.705
Y 0.371 0.847 -0.380
Z -0.599 0.532 0.599
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.2855781 0.9914895 0.6035210
Proportion of Variance 0.5509037 0.3276838 0.1214125
Cumulative Proportion 0.5509037 0.8785875 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.702 0.711
Y 0.674 0.285 -0.681
Z -0.230 0.958 0.173
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.4038307 0.9685920 0.30180949
Proportion of Variance 0.6569135 0.3127235 0.03036299
Cumulative Proportion 0.6569135 0.9696370 1.00000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.245 0.970
Y 0.685 -0.177 0.707
Z 0.686 -0.170 -0.708
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.3400570 0.9638120 0.52470332
Proportion of Variance 0.5985843 0.3096445 0.09177119
Cumulative Proportion 0.5985843 0.9082288 1.00000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.291 0.955
Y -0.671 0.251 -0.698
Z -0.682 0.162 0.713
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.2971922 0.8883869 0.7266781
Proportion of Variance 0.5609026 0.2630771 0.1760203
Cumulative Proportion 0.5609026 0.8239797 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.596 0.487 0.639
Y 0.632 0.207 -0.747
Z 0.496 -0.849 0.184
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.2203138 0.9683089 0.7571078
Proportion of Variance 0.4963886 0.3125407 0.1910707
Cumulative Proportion 0.4963886 0.8089293 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.664 0.253 0.704
Y 0.337 -0.941
Z 0.668 0.223 -0.710
Importance of components:
Comp.1 Comp.2 Comp.3
Standard deviation 1.2724995 1.0380930 0.5505524
Proportion of Variance 0.5397517 0.3592124 0.1010360
Cumulative Proportion 0.5397517 0.8989640 1.0000000
Loadings:
Comp.1 Comp.2 Comp.3
X 0.209 0.908 0.362
Y 0.723 0.105 -0.683
Z 0.658 -0.404 0.635
Comments on task c
The classes 5 and 6 gives similar but some opposite sign coefficients for X,Y and Z. When we look at the first component, the value of X is similar but values of Y and Z are opposite sign. Similar observations for comp2 and comp3. I would say these classes are similar but opposite directions like signs 3&4 or 5&6 from Figure1.
I would expect a better variance coverage from PCA when done in classes but that was not the case. Variance coverage of comp1 for all classes are near to result of overall PCA's first component.
#taking euclidean distances of each axis
distx = dist(xacc[,-1])
disty = dist(xacc[,-1])
distz = dist(xacc[,-1])
#sum each axis to get final distance measure
distall = distx + disty + distz
#mClasses=ulti-dimensional scaling 2D feature space
Classes = as.factor(class)
fitdata = cmdscale(distall, k=2)
fitdata2 = cbind(cord1= fitdata[,1], cord2= fitdata[,2])
fitdata2 = data.frame(fitdata2, class = as.factor(class))
fitdata2 = cbind(fitdata2,Classes)
ggplot(data=fitdata2, aes(x=cord1, y=cord2, colour=Classes)) + geom_point()
Comments on task d
I did not observe any clusterings in classes execpt, class 4 which spans the left side of the graph and class 3 which spans the right side of the graph. Class 2 is observed to be centered on the graph but classes are overall mixed together. It is not a viable method to decide classes.